#THE CODE BELOW CAN BE WRITTEN IN A MORE EFFICIENT WAY, BUT IT WILL DO THE JOB FOR YOUR HOMEWORK# #ped file with header pedigree2.ped <- read.table(file="pedigree2.ped",header=FALSE,sep=" ") nr.of.allele.columns <- length(names(pedigree2.ped[,7:ncol(pedigree2.ped)])) marker.nrs <- 1: (nr.of.allele.columns /2) names <- paste("SNP",sort(rep(marker.nrs,2)),c("a","b"),sep="") names(pedigree2.ped) <- c("PID","ID","FID","MID","sex","status",names) write.table(pedigree2.ped,file="pedigree.ped",col.names=TRUE,row.names=FALSE,sep=" ",quote=FALSE) # convert from two columns to one (this may take a while) data <- read.table("pedigree.ped",header=TRUE) SNPSvariantcountmat <- NULL k=1 for (k in 1:1000){ SNPSdata = data[,c(5+2*k,6+2*k)] SNPSdata[SNPSdata[,1]==0,1] <- NA SNPSdata[SNPSdata[,2]==0,2] <- NA letters = c(1, 2) # 0: refers to missingness in the file counts = rep(NA,2) counts[1] = sum(SNPSdata==1) counts[2] = sum(SNPSdata==2) common = (letters[order(counts, decreasing=TRUE)])[1] variant = (letters[order(counts, decreasing=TRUE)])[2] SNPSvariantcount = rep(NA,nrow(SNPSdata)) for (j in 1:nrow(SNPSdata)){ if (SNPSdata[j,1]==common & SNPSdata[j,2]==common) SNPSvariantcount[j] = 0 if (SNPSdata[j,1]==common & SNPSdata[j,2]==variant) SNPSvariantcount[j] = 1 if (SNPSdata[j,1]==variant & SNPSdata[j,2]==common) SNPSvariantcount[j] = 1 if (SNPSdata[j,1]==variant & SNPSdata[j,2]==variant) SNPSvariantcount[j] = 2 } # if (sum(is.na(SNPSvariantcount))>0) print("Error: non-conforming SNP!") SNPSvariantcountmat = cbind(SNPSvariantcountmat,SNPSvariantcount) } data <- cbind(data[,1:6],SNPSvariantcountmat) #we retrieve the ped ids, sex and status from original data and merge it to our new SNP data names <- paste("SNP",seq(1,1000,1),sep="") #create snp names from 1 to 1000 names(data) <- c("PID","ID","FID","MID","sex","status",names) write.table(data,file="data.dat",col.names=TRUE,row.names=FALSE,sep=" ",quote=FALSE) #Now suppose that SNPphe is your merged file: cbind(data,phecolumn) with phecolumn the trait retrieved from the phe file ## so you still need to obtain phecolumn yourself .... #Then the following commands are preparatory to use the SNPassoc tools. SNP012 <- SNPphe[,1:(ncol(SNPphe)-1)] library(genetics) SNPgenotype <- as.data.frame(SNP012) #no missing data assumed SNPgenotype[SNP012==0]<-"A/A" SNPgenotype[SNP012==1]<-"A/B" SNPgenotype[SNP012==2]<-"B/B" SNPdata <- makeGenotypes(data.frame(SNPgenotype)) library(SNPassoc) SNPandPHE <- as.data.frame(cbind(SNPdata,SNPphe[,ncol(SNPphe)])) SNPandPHE <- setupSNP(SNPandPHE, 1:(ncol(SNPphe)-1), sort = FALSE, info, sep = "/") names(SNPandPHE)[1] <- "status" All<-WGassociation(status~1,data=SNPandPHE,model="all") print(All) pvalAll<-pvalues(All) WGstats(All) plot(All)